** Clearing Stata memory
capture log close
clear all
set more off, perm
set seed 1234

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// Table O.31: Gender Gaps in Performance Performance, By Subject and Exam Day (All P1 subject scores) //////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

** Opening Phase 2 norm_scores dataset 
use "Work Data/Gender_Phase2_long.dta",clear

*** Creating variables
encode subject, gen (sub)
tab subject, gen (d_sub)
label var sub "Subject"

** Subject dummies
rename d_sub1 Biology
rename d_sub2 Chemistry
rename d_sub3 Geography
rename d_sub4 History
rename d_sub5 Language
rename d_sub6 Mathematics
rename d_sub7 Physics
rename d_sub8 Portuguese
* Labels
label var Biology "Biology"
label var Chemistry "Chemistry"
label var Geography "Geography"
label var History "History"
label var Math "Mathematics"
label var Physics "Physics"
label var Portuguese "Portuguese"
label var Language "Foreign Language"

* Days of admission exam
gen exam_day=1 if Portuguese==1 | Biology==1 //Day 1: Portuguese and Biology
replace exam_day=2  if Chemistry==1 | History==1 //Day 2: Chemistry and History
replace exam_day=3  if Physics==1 | Geography==1 //Day 3: Physics and Geography
replace exam_day=4  if Mathematics==1 | Language==1 //Day 4: Mathematics and English
tab exam_day
label var exam_day "Days of admission exam"

* # of priority discipline per applicant, per day of the admission exam
levelsof exam_day, local(levels) 
foreach i of local levels {
bys inscri2: egen n_prior_day`i'=sum(priority) if exam_day==`i'
bys inscri2: egen n_priority_day`i'=max(n_prior_day`i')
drop n_prior_day`i'
sum n_priority_day`i'
label var n_priority_day`i' "\# priority subjects in exam day `i'"
}
tab career_choice if n_priority_day1==2 // only 1 career has 2 priority subjects in the same day of the admission exam (Phonology)

** Create other priority variables:

* Only a given subject is priority:
gen only_priority=0
levelsof exam_day, local(levels) 
foreach i of local levels {
replace only_priority=1 if priority==1 & exam_day==`i' & n_priority_day`i'==1
}

* Only the other subject is priority:
gen other_priority=0
levelsof exam_day, local(levels) 
foreach i of local levels {
replace other_priority=1 if priority==0 & exam_day==`i' & n_priority_day`i'==1
}

* Both priorities
gen both_priority=0
levelsof exam_day, local(levels) 
foreach i of local levels {
replace both_priority=1 if priority==1 & exam_day==`i' & n_priority_day`i'==2
}

** P1 scores: P1 normalized subject-specific scores
forvalues i=2(1)4 {
gen norm_p1score`i'=norm_p1score^`i'
sum norm_p1score`i'
}

*********************************************************************************
****************   Relative performances ****************************************
*********************************************************************************

** ENEM
foreach v in enem norm_enem_w {
bys year female: egen `v'_ave_g=mean(`v')
gen `v'_g=`v'-`v'_ave_g
bys year female: sum `v'_g
}
drop norm_enem_w_ave_g

** Phase 1 scores

foreach v in norm_p1score  {
tab year, sum(`v')
bys year subject female: egen gs_`v'_ave=mean(`v')
gen gs_`v'=`v'-gs_`v'_ave
bys year female subject: sum gs_`v'
drop gs_`v'_ave
}

*********************************************************************************
**************** Main sample ****************************************************
*********************************************************************************

* 1) Only years before the affirmative action took place
drop if aa_year==1
drop if year==2000
tab year

*****************************************************************************
****************  Create new variables **************************************
******************************************************************************

sort inscri2
order subject gs_norm_p1score 
label var gs_norm_p1score "Phase 1 scores"

local j=1	
levelsof subject, local(levels) 
foreach i of local levels {

	by inscri2: egen norm_p1score_`i' = mean(gs_norm_p1score) if subject=="`i'" // relative performance in phase 1, similar to main specification
	by inscri2: egen aux_`i' = max(norm_p1score_`i') 
	replace norm_p1score_`i' = aux_`i' 
	drop aux_`i' 
	
	if `j' ==1 loc title= "Bio"
	if `j' ==2 loc title= "Chem"
	if `j' ==3 loc title= "Geog"
	if `j' ==4 loc title= "Hist"
	if `j' ==5 loc title= "EngFr"
	if `j' ==6 loc title= "Math"
	if `j' ==7 loc title= "Phys"
	if `j' ==8 loc title= "Port"
		
	label var norm_p1score_`i' "P1 scores `title' "

	local ++j
}

global norm_p1score_all norm_p1score_biol norm_p1score_chem norm_p1score_geog norm_p1score_hist norm_p1score_math norm_p1score_phy norm_p1score_port

global biol0 "norm_p1score_chem norm_p1score_geog norm_p1score_hist norm_p1score_math norm_p1score_phy norm_p1score_port" 
global chem0 "norm_p1score_biol norm_p1score_geog norm_p1score_hist norm_p1score_math norm_p1score_phy norm_p1score_port" 
global geog0 "norm_p1score_biol norm_p1score_chem norm_p1score_hist norm_p1score_math norm_p1score_phy norm_p1score_port" 
global hist0 "norm_p1score_biol norm_p1score_chem norm_p1score_geog norm_p1score_math norm_p1score_phy norm_p1score_port" 
global math0 "norm_p1score_biol norm_p1score_chem norm_p1score_geog norm_p1score_hist  norm_p1score_phy norm_p1score_port" 
global phy0 "norm_p1score_biol norm_p1score_chem norm_p1score_geog norm_p1score_hist norm_p1score_math  norm_p1score_port" 
global port0 "norm_p1score_biol norm_p1score_chem norm_p1score_geog norm_p1score_hist norm_p1score_math norm_p1score_phy"  

*********************************************************************************
****************   Regressions by exam day **************************************
*********************************************************************************

gen no_priority = 1-only_priority-other_priority-both_priority
tab no_priority

estimates clear

global outcome_list = "norm_score"

local count_outcome = 1
foreach outcome in $outcome_list {

if `count_outcome' ==1 local outcome_name = "scores"
if `count_outcome' ==1 local outcome_title = "Scores"

local count_enem=1
foreach j in norm_enem_w_g {
	if `count_enem' ==1 loc title_enem= "ENEM"

	levelsof subject, local(levels) 
	foreach i of local levels {
	di "`i'" 
	di "`j'"
	
	forvalues sex =1/2{ // 1= male, 2=female

		if "`i'"!="lang"{
			reg `outcome' only_priority other_priority both_priority `j'  $norm_p1score_all  if subject=="`i'" & sex==`sex', noomitted
			sum  no_priority if subject=="`i'" & sex==`sex'
			estadd scalar shareno = r(mean)
			test $`i'0
			estadd scalar f_test0 = r(p)
			estimates store reg3`sex'_`i'
			estadd local p1subject "Yes": reg3`sex'_`i'
			}
	
		}
		
		 
	* TEST ACROSS GENDER 
	if "`i'"!="lang"{

		reg `outcome' i.sex##c.(only_priority other_priority both_priority  `j' $norm_p1score_all)    if subject=="`i'"
		test 2.sex#c.only_priority 2.sex#c.other_priority 2.sex#c.both_priority 2.sex#c.`j' 2.sex#c.norm_p1score_biol 2.sex#c.norm_p1score_chem ///
		2.sex#c.norm_p1score_geog 2.sex#c.norm_p1score_hist 2.sex#c.norm_p1score_math 2.sex#c.norm_p1score_phy  2.sex#c.norm_p1score_port
		estadd scalar p_value_across_gender = r(p)
		estimates store reg3other_`i'
		}
	}

	* TEST ACROSS SUBJECTS (WITHIN GENDER)
	forvalues sex =1/2{
		forvalues count =3{
			suest reg`count'`sex'_port  reg`count'`sex'_biol reg`count'`sex'_chem reg`count'`sex'_hist reg`count'`sex'_phy reg`count'`sex'_geog  
			test [reg`count'`sex'_port_mean  = reg`count'`sex'_biol_mean = reg`count'`sex'_chem_mean = reg`count'`sex'_hist_mean = reg`count'`sex'_phy_mean = reg`count'`sex'_geog_mean ]
			local aux = `r(p)'
			est restore reg`count'`sex'_port  
			estadd scalar p_value_across_sub=`aux'
			estimates store reg`count'`sex'_port  
		}
	}
		
	* TEST - Across Subjects/Gender
	forvalues count =3{
		suest reg`count'1_port  reg`count'1_biol reg`count'1_chem reg`count'1_hist reg`count'1_phy reg`count'1_geog   reg`count'2_port  reg`count'2_biol reg`count'2_chem reg`count'2_hist reg`count'2_phy reg`count'2_geog  
		test [reg`count'1_port_mean  = reg`count'1_biol_mean = reg`count'1_chem_mean = reg`count'1_hist_mean = reg`count'1_phy_mean = reg`count'1_geog_mean =reg`count'2_port_mean  = reg`count'2_biol_mean = reg`count'2_chem_mean = reg`count'2_hist_mean = reg`count'2_phy_mean = reg`count'2_geog_mean]
		local aux = `r(p)'
		est restore reg`count'other_port  
		estadd scalar p_value_across_both = `aux'
		estimates store reg`count'other_port
		}

	forvalues count =3{
		if `count' ==3 loc title= "_p1_all"
		
esttab reg`count'other_port  reg`count'other_biol reg`count'other_chem  reg`count'other_hist  reg`count'other_phy reg`count'other_geog reg`count'other_math  using "Output/`outcome_name'_sex_`j'`title'_p1scores_diffs.tex", se star(* 0.10 ** 0.05 *** 0.01) booktabs nogap  b(%7.3f) se(%7.3f)  replace f label nonumber	mtitle("Port" "Bio" "Chem" "Hist" "Phys" "Geog" "Math" "EngFr") mgroups("Day 1" "Day 2" "Day 3" "Day 4",  pattern(1 0 1 0 1 0 1 0)  prefix(\multicolumn{@span}{c}{) suffix(})span erepeat(\cmidrule(lr){@span})) stats(p_value_across_gender p_value_across_both, fmt(%7.3f %7.3f)  labels("P-value (across gender)"  "P-value (across gender and subjects)")) unstack noomitted keep(2.sex#c.only_priority 2.sex#c.other_priority 2.sex#c.both_priority 2.sex#c.`j' 2.sex#c.norm_p1score_biol 2.sex#c.norm_p1score_chem  2.sex#c.norm_p1score_geog  2.sex#c.norm_p1score_hist 2.sex#c.norm_p1score_math  2.sex#c.norm_p1score_phy 2.sex#c.norm_p1score_port) coeflabels(2.sex#c.only_priority "Only priority" 2.sex#c.other_priority  "Only other priority" 2.sex#c.both_priority "Both priorities" 2.sex#c.`j' "`title_enem'" 2.sex#c.norm_p1score_biol "P1 scores Bio" 2.sex#c.norm_p1score_chem "P1 scores Chem" 2.sex#c.norm_p1score_geog "P1 scores Geog" 2.sex#c.norm_p1score_hist "P1 scores Hist" 2.sex#c.norm_p1score_math "P1 scores Math" 2.sex#c.norm_p1score_phy "P1 scores Phys" 2.sex#c.norm_p1score_port "P1 scores Port") refcat(2.sex#c.only_priority " \\ \multicolumn{8}{l}{\textit{Gender Differences (Women - Men)}} \\", nolabel) noomitted
		
	}
		
		local ++count_enem
}
local ++count_outcome
}



log close




